r knitr::opts_chunk$set(echo=FALSE, message=FALSE, warning=FALSE, comment=NA)

Workspace initialization

Set the working directory of this document to the directory containing the historical database.

rm(list=ls())
library(dplyr)
library(ggplot2)
library(gridExtra)
source("./Functions/pretty.R")

# Load data files
load("./Data/HistoricalDatabase.RData")
hodge.df <- readLines("./Data/hodge.txt", n = -1)
scopus.df <- read.csv("./Data/scopus.csv")
abb.df <- read.csv("./Data/master.csv")
sangam.df <- read.csv("./Data/sangam.csv")

# Create colour palette for consistent plotting
# http://colorbrewer2.org/?type=diverging&scheme=PuOr&n=4
cp <- c("#fdb864", "#e66101", "#b2abd2", "#5e3c99" )

Descriptive summaries

#=======================================
# Descriptives
#=======================================

numberArticles <- length(which(full.df$attributes == "article"))

numberJournals <- filter(full.df, attributes == "journal") %>%
  summarise(Unique = n_distinct(record))

unique.swHistory <- filter(full.df, attributes == "journal") %>%
    distinct(record)

hodge.swHistory.intersect <- intersect(hodge.df, unique.swHistory$record)

hodge.swHistory.diff <- setdiff(hodge.df, unique.swHistory$record)

Table 1

#=======================================
# Table 1. Journal summary 
#=======================================
# Integrate min - max years for each journal
unique.titles <- filter(full.df, attributes == "journal") %>%
  group_by(record) %>%
  mutate(Hodge = ifelse(record %in% hodge.df == TRUE, "Y", "N")) %>%
  summarise(N = n(), Hodge.list = max(Hodge))

unique.year <- filter(full.df, attributes == "pubYear") %>%
  group_by(record)

n.so <- filter(full.df, attributes == "journal") %>% 
  mutate(title = record) %>% 
  select(-attributes, -record)

n.yr <- filter(full.df, attributes == "pubYear") %>% 
  mutate(year = record ) %>% 
  select(-attributes, -record)

n.so.yr <- left_join(n.so, n.yr) %>%
    group_by(title) %>%
    summarise(first = min(year), last = max(year), n = n()) %>%
    arrange(desc(n)) %>%
    rename(Journal = title) %>%
    mutate(index="index_variable")

table1 <- left_join(n.so.yr, abb.df) %>%
  select(-X) %>%
  select(Journal, Abbrev, n, first, last)

sum_Articles <- sum(table1$n)
median_Articles <- median(table1$n)
mean_Articles <- mean(table1$n)
sd_Articles <- sd(table1$n)

print(table1)

Figure 1 - Small Multiples

This figure is for the manuscript. Please note that the rendering is optimized for the manuscript, not an online rendering.

# Create an initial data frame from HD
sm.df <- left_join(n.so, n.yr) %>%
  group_by(title, year) %>%
  summarize(N = n()) %>%
  mutate(Overall.N = sum(N), year = as.numeric(year)) %>%
  ungroup() %>%
  arrange(desc(Overall.N), title) %>%
  rename(Year = year, Journal= title) 


sm.df.reduced <- sm.df %>% select(-Overall.N)

scopus.df$DataBase <- rep("Scopus", nrow(scopus.df))
DataBase <- rep("HD", nrow(sm.df.reduced))
sm.df.reduced$DataBase <- DataBase
sm.df.reduced <- sm.df.reduced %>% rename(Documents = N) 
sm.df.reduced$Documents <- ifelse(sm.df.reduced$Documents == 0, NA, sm.df.reduced$Documents)  
sm.df.reduced <- na.omit(sm.df.reduced)

combined <- rbind(scopus.df, sm.df.reduced)
combined_reduced <- combined[combined$Journal %in% c("Families in Society", "Social Work", "British Journal of Social Work", "Journal of Gerontological Social Work", "Research on Social Work Practice", "Social Work in Health Care", "International Social Work", "Health & Social Work", "Journal of Social Work Education"), ]

combined_reduced$Journal <- factor(combined_reduced$Journal, levels = c("Families in Society", "Social Work", "British Journal of Social Work", "Journal of Gerontological Social Work", "Research on Social Work Practice", "Social Work in Health Care", "International Social Work", "Health & Social Work", "Journal of Social Work Education") )

# Create Scopus labels for the facets
scopus.df$Year <- as.numeric(scopus.df$Year)
scopus.df <- filter(scopus.df, Year <= 2013)
scopus.df$Year <- as.numeric(scopus.df$Year)
scopus_annotate <- filter(scopus.df, Year <= 2013) %>%
  group_by(Journal) %>%
  summarize(N = pretty(sum(Documents))) 

scopus_annotate_paper <-scopus_annotate[scopus_annotate$Journal %in% combined_reduced$Journal, ]
scopus_annotate_paper$label <- paste0("N=", pretty(scopus_annotate_paper$N))

# Create HD Labels for the facets
hd_annotate <- combined_reduced %>% 
  filter(DataBase == "HD") %>%
  group_by(Journal) %>%
  summarize(N = sum(Documents))%>%
  mutate(label = paste0("N=", pretty(N)))

small_multiples_paper <- ggplot(combined_reduced,  aes(Year, Documents, colour = DataBase)) +
  geom_line(size = .5) + 
  facet_wrap(~ Journal) +
  theme_bw() + 
  theme(strip.text.x = element_text(size = 14)) +
  theme(legend.position = "bottom", legend.text = element_text(size = 14)) + 
  scale_colour_discrete(guide = guide_legend(title = "Database"), 
    breaks = c("HD", "Scopus"), labels = c("Historical Database", "Scopus")) + 
  scale_colour_manual(values = c("black", "#999999"), labels = c("Social Work Research Database", "Scopus"), 
    guide = guide_legend(title = "")) + 
  ylab("Number of Articles") + 
  xlab("\nYear") +
  annotate("segment", x = 1992, xend = 1995, y = 140, yend = 140, colour = "black", size = 1, alpha = .80) + 
  annotate("segment", x = 1992, xend = 1995, y = 120, yend = 120, colour = "#999999", size = 1, alpha = .80) +
  geom_text(data = scopus_annotate_paper, aes(x = 1999, y =  120, label = label), colour = "black", size = 5) +
  geom_text(data = hd_annotate, aes(x = 1999, y =  140, label = label), colour = "black", size = 5)

#------------Create high quality jpeg-----------------

#ggsave(small_multiples_paper, file = "Figure_1_small_multiples.jpg", dpi =  1200, height = 8, width = 12, units = "in")

print(small_multiples_paper)

Small Multiples - Online Supplement

# Create an initial data frame from HD
sm.df <- left_join(n.so, n.yr) %>%
  group_by(title, year) %>%
  summarize(N = n()) %>%
  mutate(Overall.N = sum(N), year = as.numeric(year)) %>%
  ungroup() %>%
  arrange(desc(Overall.N), title) %>%
  rename(Year = year, Journal= title) 

scopus.df$Year <- as.numeric(scopus.df$Year)
scopus.df <- filter(scopus.df, Year <= 2013)

# Join HD with Scopus and join abbreviation names
sm.scopus.merged <-full_join(sm.df, scopus.df, by = c("Journal", "Year")) %>%
  select(Year, Journal, N, Overall.N, Documents)

sm.scopus.merged.table <- group_by(sm.scopus.merged, Journal) %>%
  summarize(DataBase.N = sum(N, na.rm=TRUE), Scopus.N = sum(Documents, na.rm=TRUE)) %>%
  arrange(desc(DataBase.N))

abb.df$ID <- 1:nrow(abb.df)

sm.scopus.merged <- full_join(sm.scopus.merged, abb.df, by = "Journal") %>% 
  select(-X) %>%
  mutate(Documents = ifelse(Documents == 0, NA, Documents))


 # Re-order the factor by Overall.N
title <- sm.scopus.merged %>% arrange(desc(Overall.N)) %>% 
  distinct(Abbrev) %>% 
  filter(!is.na(Abbrev)) %>% 
  distinct()

title.order = title$Abbrev

sm.scopus.merged$Abbrev <- factor(sm.scopus.merged$Abbrev, levels = (title.order))
sm.scopus.merged <- group_by(sm.scopus.merged, Journal) %>%
  mutate(Overall.Scopus = sum(Documents, na.rm=TRUE)) %>%
  ungroup()


 # Create labels for the facets
Overall.N <- distinct(sm.scopus.merged, Abbrev) 
lab1 <- gsub(" ", "", paste("N=", pretty(Overall.N$Overall.N), sep=""))
lab2 <- gsub(" ", "", paste("N=", pretty(Overall.N$Overall.Scopus), sep=""))
lab <- paste(lab1, lab2, sep="\n")
xpos = rep(1999, 80)
ypos = rep(190, 80)
ldata <- data.frame(xpos, ypos, lab, Overall.N$Abbrev) %>% 
  rename(Abbrev = Overall.N.Abbrev) 

ldata <- na.omit(ldata)

 titles.reduced <- unique(sm.scopus.merged$Journal)
 titles.reduced.1 <- titles.reduced[1:40]
 titles.reduced.2 <- titles.reduced[41:length(titles.reduced)] 
 cp <- c("#fdb864", "#e66101", "#b2abd2", "#5e3c99" )

# Plotting function    
# 
 plotSmallMultiples.1 <-  ggplot(sm.scopus.merged[sm.scopus.merged$Journal %in% titles.reduced.1, ],  
   aes(Year, N)) +
  geom_line(colour="#e66101") +
  geom_line(data = sm.scopus.merged[sm.scopus.merged$Journal %in% titles.reduced.1, ], 
    aes(Year, Documents), colour = "#533c99", alpha = .80) +  
  facet_wrap(~ Abbrev) + 
  theme_bw() +
  theme(strip.text.x = element_text(size = 8)) + 
  theme(legend.position = "bottom", 
  panel.grid.minor = element_blank(), 
  axis.text.x = element_text(angle=45, hjust=1, size =8)) + 
  geom_text(data = ldata[1:40, ], aes(x =xpos, y = ypos, label = lab), size =2.5) +
  ylim(c(0,225)) +
  scale_x_continuous(breaks=seq(1989, 2015, 8)) +
  annotate("segment", x = 1989, xend = 1992, y = 210, yend = 210, colour = "#e66101") + 
  annotate("segment", x = 1989, xend = 1992, y = 170, yend = 170, colour = "#533c99") +
  ggtitle("Number of Article Records by Journal, 1989 - 2013 (Part 1)")

plotSmallMultiples.2 <- ggplot(sm.scopus.merged[sm.scopus.merged$Journal %in% titles.reduced.2, ],  
  aes(Year, N)) +
  geom_line(colour="#e66101") +
  geom_line(data = sm.scopus.merged[sm.scopus.merged$Journal %in% titles.reduced.2, ], 
  aes(Year, Documents), colour = "#533c99", alpha = .80) + 
  facet_wrap(~ Abbrev) + 
  theme_bw() +
  theme(strip.text.x = element_text(size = 8)) +  
  theme(legend.position = "bottom", 
  panel.grid.minor = element_blank(), 
  axis.text.x = element_text(angle=45, hjust=1, size =8)) + 
  geom_text(data = ldata[41:80, ], aes(x =xpos, y = ypos, label = lab), size =2.5) +
  ylim(c(0,225)) +
  scale_x_continuous(breaks=seq(1989, 2015, 8)) +
  annotate("segment", x = 1989, xend = 1992, y = 210, yend = 210, colour = "#e66101") + 
  annotate("segment", x = 1989, xend = 1992, y = 170, yend = 170, colour = "#533c99") +
  ggtitle("Number of Article Records by Journal, 1989 - 2013 (Part 2)")

print(plotSmallMultiples.1)
print(plotSmallMultiples.2)

ggsave(plotSmallMultiples.1, file = "smallMultiples1.jpeg", width = 10, height = 7, units = "in", dpi=1200)

ggsave(plotSmallMultiples.2, file = "smallMultiples2.jpeg", width = 10, height = 7, units = "in", dpi=1200)

Figure 2 - Growth of Social Work Research

#=======================================
# Journals
#=======================================

n.journals.year <- filter(full.df, attributes == "journal") %>%
    select(articleID, record)

n.year <- filter(full.df, attributes == "pubYear") %>%
    select(articleID, record)

n.journals.year <- left_join(n.journals.year, n.year, by = "articleID", copy=TRUE) %>%
    group_by(record.y) %>%
    summarise(Journal.count = n_distinct(record.x)) %>%
    mutate(record.y = as.numeric(record.y))



exp.047 <- c(31, rep(NA, 24)) 
for(i in 2:25){
    exp.047[i] <- exp.047[i-1] + exp.047[i-1]*.047
} 

exp.0307 <- c(31, rep(NA, 24)) 
for(i in 2:25){
    exp.0307[i] <- exp.0307[i-1] + exp.0307[i-1]*.0307
}


n.journals.year$Price <- exp.047
n.journals.year$SWHD <- exp.0307
n.journals.year <- mutate(n.journals.year, Journal.count = as.numeric(Journal.count))
n.journals.year <- as.data.frame(n.journals.year)

years <- c(1989, 2005, 2013, 2013)
points <- c(31, n.journals.year$Journal.count[c(17,25)], max(n.journals.year$exp.05))
annotate.df <- data.frame(years, points)


cols = c("Observed Growth" = cp[1], "3.07% CAGR" = cp[4], "4.7% CAGR" = cp[3])


figure2 <- ggplot(n.journals.year, aes(record.y, 
    y = Journal.count)) + 
    theme_bw() + 
    theme(axis.text.x = element_text(size=8), 
    title = element_text(size = 10), plot.title=element_text(size=10)) + 
    xlab("Year") + 
    ylab("Journals") + 
    scale_y_continuous(breaks = c(seq(0, 100, 10)), limits = c(20, 95)) +
    theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=.75)) + 
    geom_line(aes(x = record.y, y = Price, colour = "Estimated 5% CAGR"),
        data = n.journals.year,   colour = "#999999", alpha = .80) +
    geom_line(aes(x = record.y, y = SWHD, colour = "Estimated 4% CAGR"), 
        data = n.journals.year,  colour = "#999999", alpha = .80) +
    geom_line(aes(colour = "Observed Growth"), colour = "black", size = 1.5) + 
    stat_smooth(method = "lm", se = FALSE, colour = "#999999") +
    annotate("text", x = 2014.5, y = 60, label = "3.07% CAGR", size = 4) +
    annotate("text", x = 2016, y = 74.5, label = "Linear growth", size = 4) + 
    annotate("text", x = 2015.5, y = 95, label = "4.7% CAGR", size = 4) + 
    annotate("text", x = 2016.5, y = 67, label = "Observed growth", size = 4) +
    scale_x_continuous(limits = c(1989, 2020), breaks = c(1989, 1993, 1997, 2001, 2005, 2009, 2013)) +
    #ggtitle("Figure 2. Observed Versus Estimated Growth of Journals (1989-2013)") + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.line = element_line(), 
          axis.text.x = element_text(size = 16), 
          axis.text.y = element_text(size = 16), 
          axis.title.x = element_text(size = 16), 
          axis.title.y = element_text(size = 16)) +
    theme(plot.title = element_text(size = 19))

#ggsave(figure2, file = "figure2.jpeg", width = 7, height = 7, units = "in", dpi = 1200)

print(figure2)
n.journals.year <- filter(full.df, attributes == "journal") %>%
    select(articleID, record)

n.year <- filter(full.df, attributes == "pubYear") %>%
    select(articleID, record)

n.journals.year <- left_join(n.journals.year, n.year, by = "articleID", copy=TRUE) %>%
    group_by(record.y) %>%
    summarise(Journal.count = n_distinct(record.x)) %>%
    mutate(record.y = as.numeric(record.y)) 

n.journals.year$yearCount <-  c(1:nrow(n.journals.year))

Figure 3 Annual Change

curr <- n.journals.year$Journal.count[-1]
prev <- n.journals.year$Journal.count[1:(length(n.journals.year$Journal.count)-1)]
annualChange.numeric <- c(0, (100 * (curr-prev)/ prev))
annualChange.dichotomous <- ifelse(annualChange.numeric < 0, "Decrease", "Increase")
annualChange.df <- data.frame(annualChange.numeric, annualChange.dichotomous)
annualChange.df$year <- c(1989:2013)

figure3 <- ggplot(annualChange.df, aes(year, annualChange.numeric, fill =annualChange.dichotomous)) + 
    theme_bw() + 
    geom_bar(stat="identity") +
    scale_fill_manual(values = c("black", "#999999"), limits = c("Increase", "Decrease")) + 
    theme(legend.position = c(1,1), legend.justification = c(1,1), legend.title = element_blank()) + 
    ylab("Percent change") +
    xlab("Year")  +
    scale_x_continuous(breaks=seq(1989, 2015, 4)) + 
    scale_y_continuous(breaks = seq(-10, 15, 5)) + 
    geom_hline(yintercept=seq(-5, 15, 5), col="white", lwd=0.4) +
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
          panel.border = element_blank(), axis.line = element_line(), 
          axis.text.x = element_text(size = 16), 
          axis.text.y = element_text(size = 16), 
          axis.title.x = element_text(size = 16), 
          axis.title.y = element_text(size = 16)) + 
    geom_hline(yintercept=0, col="gray", lwd=0.6, alpha =.8) +
    #ggtitle("Figure 3. Annual Percentage Change in the Number Journals (1989 - 2013)") +
    theme(plot.title = element_text(size = 19))

#ggsave(figure3, file = "figure3.jpeg", width = 7, height = 7, units = "in", dpi=1200)

print(figure3)


bryanvictor/BibWrangleR documentation built on May 13, 2019, 8:11 a.m.